This script is used to produce the analyses of the paper. Our data is preprocessed in the data_processing.Rmd file.

Last edit: 30/03/2024

##Setup

Figure 1: PCR models

Fitting exponential and logistic PCR models to qPCR data (SybrGreen) of Capsella bursa-pastoris. Data and script are described in Supplementary File 1.

qPCR data

Converting RFU to number of molecules for illustrative purpose (scaling).

#Open file containing qPCR data
ampli <- read.csv("data/20210712_101200_CT030043_SPER01_ANALYSE -  Quantification Amplification Results_SYBR.csv")[,-1]

ampli <- ampli %>% pivot_longer(2:ncol(ampli))
colnames(ampli)[2:3]<- c("Well","RFU")
ampli <- ampli[ampli$Well == "B7",] #B7: species Cbp, sample 26

K <- 1.32e11 #Charge capacity, computed below

#Convert RFU to molecules
a <- K/(max(ampli$RFU)-min(ampli$RFU))
b <- -a*min(ampli$RFU)
ampli$molecules <- a * ampli$RFU + b

data_ampli <- as.matrix(ampli[,c("Cycle", "RFU")])
data_ampli[,1] <- data_ampli[,1] - 1

ncycles <- nrow(ampli)

Fitting PCR models

Parameters are fitted numerically. No biological consideration here. The function (pcrfit) and the data structure used come from R package qpcR.

Non-zero low weights are given for numerical reasons.

mexp <- cm3 #Reuse model object from package qPCR

#Define model

mexp$expr <- "Fluo ~ mexp$fct(Cycles, c(M0, Lam))"
mexp$fct <- function(x, parm) {
  M0 <- parm[1]
  Lam <- parm[2]
  Fn <- vector(mode = "numeric", length = length(x))
  for (i in 1:length(x)) {
    if (i == 1) Fn[i] <- M0
    else {
      Fn[i] <- Fn[i - 1] * (1 + Lam*(1-Fn[i - 1]/K))
    }
  }
  return(Fn)
}
mexp$ssFct <- function(x, y) {
  ## start estimates
  M0 <- 1
  Lam <- 1
  ssVal <- c(M0, Lam)
  names(ssVal) <- mexp$parnames
  return(ssVal)
}
mexp$inv <- function(y, parm) {
  x <- 1:100
  fn <- function(x, parm) mexp$fct(x, parm) - y
  uniroot(fn, interval = c(1, 100), parm)$root
}
mexp$expr.grad <- expression(mexp$fct(Cycles, c(M0, Lam)))
mexp$parnames <- c("M0", "Lam")
mexp$name <- "mexp"
mexp$type <- "my exponential model"


#Weight cycles

weights_exp <- c(rep(0, 5), rep(1, 9), rep(1000, 6), rep(0, 41))

fitexp <- pcrfit(data_ampli, 1, 2, mexp,
                 weights = weights_exp)

res_exp <- a*fitexp$m$predict(newdata = fitexp$DATA) + b
mlog <- cm3 #Reuse model object from package qPCR

#Define model

mlog$expr <- "Fluo ~ mlog$fct(Cycles, c(M0, Lam))"
mlog$fct <- function(x, parm) {
  M0 <- parm[1]
  Lam <- parm[2]
  K <- max(ampli$RFU)
  Fn <- vector(mode = "numeric", length = length(x))
  for (i in 1:length(x)) {
    if (i == 1) Fn[i] <- M0
    else {
      if (Fn[i - 1] < K){Fn[i] <- Fn[i - 1] * (1 + Lam*(1-Fn[i - 1]/K)) }
      else {Fn[i] <- Fn[i - 1]}
    }
  }
  return(Fn)
}
mlog$ssFct <- function(x, y) {
  ## start estimates
  M0 <- 1
  Lam <- 1
  K <- max(ampli$RFU)
  ssVal <- c(M0, Lam)
  names(ssVal) <- mlog$parnames
  return(ssVal)
}
mlog$inv <- function(y, parm) {
  x <- 1:100
  fn <- function(x, parm) mlog$fct(x, parm) - y
  uniroot(fn, interval = c(1, 100), parm)$root
}
mlog$expr.grad <- expression(mlog$fct(Cycles, c(M0, Lam)))
mlog$parnames <- c("M0", "Lam")
mlog$name <- "mlog"
mlog$type <- "my logistic model"


#Weight cycles

weights_log <- c(rep(0, 5), rep(1, 9), rep(1000, 10), rep(1, 15), rep(0, 22))

fitlog <- pcrfit(data_ampli, 1, 2, mlog,
                 weights = weights_log)

res_log <- a*fitlog$m$predict(newdata = fitlog$DATA) + b

Figure: Fitted PCR models

ggplot()+
        geom_point(data = ampli, aes(Cycle-1, molecules))+
        geom_line(data = NULL, aes(0:60, res_log,
                                   col = "Logistic"),
                  linewidth = 0.75)+
        geom_point(data = ampli, aes(Cycle-1, molecules))+
        geom_line(data = NULL, aes(0:60, res_exp,
                                   col = "Exponential"),
                  linewidth = 0.75)+
        labs(x = "Cycle",
             y = "Estimated number of molecules", 
             col = "Amplification model")+
        theme(legend.justification = c(1, 0),
              legend.position = c(1, 0),
        legend.box.margin=margin(rep(10, 4)))+
  scale_colour_brewer(palette = "Set1")+
    # scale_y_log10()+
  coord_cartesian(xlim = c(0, 50), ylim = c(1, K))

#+
        # ggtitle(
        #   paste0("Real qPCR data and saturation models - Species : ",
        #                "Capsella bursa-pastoris"))+
        #   theme(plot.title = element_text(hjust = 0.5))

# ggsave(filename = "Figures/saturation_models2.pdf",
#        dpi = 600, units = "mm",
#        width = 150, height = 100, device = "pdf")

Digital droplet PCR (ddPCR) assays

ddPCR data

Dataframe summary_experiment contains all info concerning the concentration assays.

#QuantaSoft output file with additional data
dataddpcr <- read_csv("data/dataddpcr.csv")
## Rows: 85 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Well, Status, sp
## dbl (8): Concentration, Negatives, AcceptedDroplets, Sample, Dilu, plate, To...
## lgl (1): Used
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Used columns:
# Sample: Number of the plant sample
# Concentration: Assayed copies/ul in well
# sp: species
# TotalDNA: Total DNA concentration in ddPCR wells, ng/ul

#Open extraction data
#Used columns:
# species name
# Sample: Number of the plant sample
# CDNA_sample: Concentration of total DNA in the sample assayed by Qubit, ng/ul

extraction <- read.csv("data/extraction.csv")

Vdna_dd <- 5 #Volume of DNA in a well in ul
Vmix_dd <- 20 #Volume of mix in a well in ul

#summary_experiment contains all info concerning the concentration assays
#Molecules_ng is the number of target molecules / ng of total DNA
summary_experiment <- dataddpcr %>% group_by(Sample) %>%
  summarise(sp = sp[1],
            Molecules_ng = mean(Concentration/TotalDNA)) %>%
  left_join(extraction) %>%
  dplyr::select(Sample, species_name, sp, Molecules_ng, CDNA_sample)
## Joining with `by = join_by(Sample)`

Metabarcoding preparation

DNA concentration in the different communities

Uniform community (M_U)

CDNA_U <- 0.5
#Total DNA concentration in ng/ul for all plants
#in the U community (concentration in sample, not in pcr mix)
C_species_ref <- CDNA_U/13 #concentration of each species, ng/ul

summary_experiment <- summary_experiment %>%
  mutate(Volume_equi = max(Molecules_ng)/Molecules_ng,
         cDNA_U = C_species_ref*Volume_equi,
         #Concentration of total DNA to use in the U commu
         molecules_U = cDNA_U*Molecules_ng, #Molecules/ul in sample
         propU = molecules_U/sum(molecules_U), #species proportion
         Volume_equi = max(Molecules_ng)/Molecules_ng,
         #Volume to take to be as concentrated as 1 ul of Ptr 6 (most concentrated)
          RankG = 13 - rank(CDNA_sample/Volume_equi) + 1)

Number of molecules in the M_U community

VDNA_metab <- 2 #Volume of DNA used in the mix, ul
VDNA_metab * summary_experiment$molecules_U #Molecules of each species in mix
##  [1] 19084.31 19084.31 19084.31 19084.31 19084.31 19084.31 19084.31 19084.31
##  [9] 19084.31 19084.31 19084.31 19084.31 19084.31
VDNA_metab * sum(summary_experiment$molecules_U) #Total number of molecules
## [1] 248096

“Uniform in Total DNA” Communauty (M_T)

Number of molecules in the M_T community

summary_experiment <- summary_experiment %>%
  mutate(molecules_T = C_species_ref*Molecules_ng, #molecules/ul of sample
         propT = molecules_T/sum(molecules_T)) #species proportions

VDNA_metab * summary_experiment$molecules_T #Molecules of each species in mix
##  [1] 11760.000 19084.308  4051.282  6695.385  5672.308  6896.410 10360.237
##  [8]  9659.487 16280.000  2964.103  6330.256  2883.282 10806.154
VDNA_metab * sum(summary_experiment$molecules_T) #total number of molecules
## [1] 113443.2

Geometric community (M_G)

Number of molecules in the M_G community (2-fold dilution)

summary_experiment <- summary_experiment %>%
  mutate(Volume_equi = max(Molecules_ng)/Molecules_ng,
         cDNA_G = CDNA_U/2^RankG*Volume_equi,
         molecules_G = cDNA_G*Molecules_ng,
         propG = molecules_G/sum(molecules_G))

summary_experiment$molecules_G * VDNA_metab #Molecules of each species in mix
##  [1]   7753.00000  15506.00000    121.14063   1938.25000   3876.50000
##  [6]     60.57031    969.12500 124048.00000  62024.00000    484.56250
## [11]    242.28125     30.28516  31012.00000
sum(summary_experiment$molecules_G) * VDNA_metab #Total number of molecules
## [1] 248065.7

Table 1 and Supplementary Table 1: barcodes

Information about the barcodes used with Sper01 marker.

barcodes <- read_excel("data/new_plant_positive_control.xlsx")[,c(2,4:6)]
## New names:
## • `` -> `...1`
names(barcodes) <- c("species_name", "sequence", "length", "gc_content")

barcodes[barcodes$species_name ==
            "Salvia_pratensis",]$sequence <- "atcctgttttctcaaaacaaaggttcaaaaaacgaaaaaaaaaaag"

barcodes <- barcodes %>% filter(species_name != "Rumex_acetosa")
#Species not used in this experiment
#Information about barcodes and extracted concentrations
barcodes %>% left_join(extraction[,-2])
## Joining with `by = join_by(species_name)`
## # A tibble: 30 × 5
##    species_name       sequence                     length gc_content CDNA_sample
##    <chr>              <chr>                         <dbl>      <dbl>       <dbl>
##  1 Taxus_baccata      atccgtattataggaacaataatttta…     41       24.4        7.94
##  2 Taxus_baccata      atccgtattataggaacaataatttta…     41       24.4       13.3 
##  3 Salvia_pratensis   atcctgttttctcaaaacaaaggttca…     45       26.7       20.8 
##  4 Salvia_pratensis   atcctgttttctcaaaacaaaggttca…     45       26.7       24.4 
##  5 Populus_tremula    atcctatttttcgaaaacaaacaaaaa…     68       25         33   
##  6 Populus_tremula    atcctatttttcgaaaacaaacaaaaa…     68       25         31.4 
##  7 Carpinus_betulus   atcctgttttcccaaaacaaataaaac…     61       27.9       13.1 
##  8 Carpinus_betulus   atcctgttttcccaaaacaaataaaac…     61       27.9        9.14
##  9 Fraxinus_excelsior atcctgttttcccaaaacaaaggttca…     39       33.3        6.56
## 10 Fraxinus_excelsior atcctgttttcccaaaacaaaggttca…     39       33.3       22.4 
## # ℹ 20 more rows

Figure 3: ddPCR assays

Target DNA concentrations of each sample

data_dd_fig <- dataddpcr %>%
  left_join(summary_experiment %>% dplyr::select(sp, species_name, RankG))
## Joining with `by = join_by(sp)`
order_short <- summary_experiment %>% dplyr::select(sp, RankG) %>%
  arrange(-RankG) %>% pull(sp)

highlight <- function(x, pat, color="black", family="") {
  ifelse(grepl(pat, x), glue("<b style='font-family:{family}; color:{color}'>{x}</b>"), x)
}

ggplot(data_dd_fig)+
  geom_point(aes(factor(sp, levels = order_short, ordered = T),
                 Concentration_copies_ng/1000,
                 shape = as.factor(TotalDNA)))+
  geom_point(data = summary_experiment,
             aes(as.factor(sp), Molecules_ng/1000),
             fill = "red", shape = 23, size = 3)+
  # ggtitle("Target DNA molecules at given Total DNA concentration")+
  labs(x = "Plant species",
       y = "Thousands of molecules per ng of total DNA\n",
       shape = TeX("Concentration (ng/$\\mu l$)"))+
  scale_x_discrete(labels= function(x) highlight(x, "Cb|Fe"))+
  # theme_bw()+
  theme(axis.text.x=element_markdown(),
        legend.title.align=0.5,
        legend.text.align = 0.5)

# ggsave(filename = "Figures/ddpcr_fig_clean.pdf",
#        dpi = 600, units = "mm",
#        width = 150, height = 100, device = "pdf")

# ggsave(filename = "Figures/ddpcr_fig_clean.png",
#        dpi = 600, units = "mm",
#        width = 150, height = 90)

Statistics from ddPCR assays:

Species Populus tremula and Rhododendron ferrugineum

data_dd_fig %>% filter(sp == "Ptr") %>%
  pull(Concentration_copies_ng) %>% summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  236800  240000  251200  248096  255520  256960
data_dd_fig %>% filter(sp == "Ptr") %>%
  pull(Concentration_copies_ng) %>% sd
## [1] 9171.504
data_dd_fig %>% filter(sp == "Rfe") %>%
  pull(Concentration_copies_ng) %>% summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   33152   33680   34912   37483   37176   50720
data_dd_fig %>% filter(sp == "Rfe") %>%
  pull(Concentration_copies_ng) %>% sd
## [1] 6695.412
summary_experiment$Molecules_ng %>% summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   37483   73740   89653  113443  140480  248096

Metabarcoding results

Pipeline from raw fastq data: see data_processing.Rmd file (we used the obitools softwares).

Open data, filter bad PCR replicates:

df_Sper01 <- read_csv("data/df_Sper01.csv")
## Rows: 74708 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Sample, id, sequence, Community, species_name
## dbl (8): obiclean_weight, reads, Spike_level, total_count_sample, count, seq...
## lgl (1): NegControl
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary_sample <- df_Sper01 %>%
  group_by(Sample) %>%
  summarise(total_count_sample = sum(reads),
            NegControl = NegControl[1],
            Spike_level = Spike_level[1],
            Community = Community[1]) %>%
  arrange(total_count_sample)

summary(summary_sample$total_count_sample)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     414   12247   31505   35305   54730  108166
summary(summary_sample %>%
          filter(!NegControl) %>% pull(total_count_sample))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1068   13952   33712   37023   55612  108166
sd(summary_sample %>%
          filter(!NegControl) %>% pull(total_count_sample))
## [1] 26763.09
reads_threshold <- 5000 #Replicates with less than 5000 reads are removed

ggplot(summary_sample %>%
          filter(!NegControl))+
  geom_point(aes(x = 1:nrow(summary_sample %>%
          filter(!NegControl)),
                 y = total_count_sample,
                 shape = Community))+
  labs(col = "Spike_level")+
  scale_y_log10()+
  geom_hline(yintercept = reads_threshold)

Data preparation

Observed and expected proportions in final composition.

Community M_U

df_targetU <- df_Sper01 %>%
  filter(Community == "U" &
           species_name %in% barcodes$species_name &
                          total_count_sample > reads_threshold &
                          !NegControl) %>%
  mutate(prop = obiclean_weight/total_count_sample) %>%
  left_join(summary_experiment %>% dplyr::select(species_name, sp, propU)) %>%
  rename(prop_expected = propU)
## Joining with `by = join_by(species_name)`

Community M_T

df_targetT <- df_Sper01 %>%
  filter(Community == "T" &
           species_name %in% barcodes$species_name &
           total_count_sample > reads_threshold &
           !NegControl) %>%
  mutate(prop = obiclean_weight/total_count_sample) %>%
  left_join(summary_experiment %>% dplyr::select(species_name, sp, propT)) %>%
  rename(prop_expected = propT)
## Joining with `by = join_by(species_name)`

Community M_G

df_targetG <- df_Sper01 %>%
  filter(Community == "G" &
           species_name %in% barcodes$species_name &
           total_count_sample > reads_threshold &
           !NegControl) %>%
  mutate(prop = obiclean_weight/total_count_sample) %>%
  left_join(summary_experiment %>% dplyr::select(species_name, sp, propG)) %>%
  rename(prop_expected = propG)
## Joining with `by = join_by(species_name)`

Figure 4: observed and expected proportions

colp <- c("Expected" = "gold",
          "Expected\nwithout\nPCR bias" = "gold",
          "Expected\nwithout\nPCR and\nconcentration\nbiases" = "gold")
          #"Inferred\nwith model" = "darkolivegreen2")

gobsU1 <- ggplot(df_targetU)+
  geom_boxplot(aes(factor(sp, levels = order_short, ordered = T),
                   prop))+
      geom_point(data = summary_experiment,
             aes(sp, 1/nrow(summary_experiment),
                 fill = "Expected\nwithout\nPCR bias"),
             shape = 23,
             size = 3)+
    geom_hline(aes(yintercept = 1/nrow(summary_experiment),
                col = "Expected\nwithout\nPCR and\nconcentration\nbiases"),
             linetype = "dashed")+
  ggtitle(TeX("Uniform community (${{M}_U}$)"))+
  labs(y = "", x = "", fill = "", col = "")+
  ylim(min(df_targetT$prop), max(df_targetT$prop)) +
  theme(plot.title = element_text(hjust = 0.5),
        axis.text.x=element_markdown(),
        legend.title.align=0.5)+
  scale_color_manual(values = colp)+
  scale_fill_manual(values = colp)+
  scale_x_discrete(labels= function(x) highlight(x, "Cb|Fe"))
  

gobsT1 <- ggplot(df_targetT)+
  geom_boxplot(aes(factor(sp, levels = order_short, ordered = T),
                   prop))+
    geom_point(data = summary_experiment,
             aes(sp, propT, fill = "Expected\nwithout\nPCR bias"),
             shape = 23,
             size = 3)+
  geom_hline(aes(yintercept = 1/nrow(summary_experiment),
                col = "Expected\nwithout\nPCR and\nconcentration\nbiases"),
             linetype = "dashed")+
  ggtitle(TeX("Uniform in Total DNA community (${{M}_T}$)"))+
  labs(y = "", x = "", fill = "", col = "")+
  theme(plot.title = element_text(hjust = 0.5),
        axis.text.x=element_markdown(),
        legend.title.align=0.5)+
  scale_color_manual(values = colp)+
  scale_fill_manual(values = colp)+
    scale_x_discrete(labels= function(x) highlight(x, "Cb|Fe"))


# gobsG1 <- ggplot(df_targetG)+
#   geom_boxplot(aes(factor(sp, levels = order_short, ordered = T),
#                    prop))+
#   labs(y = "", x = "")+
#   ggtitle("Geometric community (G)")+
#   theme(plot.title = element_text(hjust = 0.5))+
#   geom_point(data = summary_experiment,
#              aes(sp, propG), shape = 23,
#              fill = "gold", size = 3)+
#   scale_y_log10()

fig <- ggpubr::ggarrange(gobsU1, gobsT1, #gobsG1,
                  ncol = 1,
                  common.legend = T,
                  legend = "right")
annotate_figure(fig,
# top = text_grob("Final abundances in two mock communities with Sper01"),
                bottom = text_grob("Species"),
                left = text_grob("Observed reads proportions", rot = 90))

# ggsave("Figures/metabar_results.pdf", units = "mm",
#        width = 150, height = 200, dpi = 600, device = "pdf")

Examples of proportions

Species Populus tremula

df_targetU %>% filter(sp == "Ptr") %>% pull(prop) %>% summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.03278 0.07092 0.08630 0.08618 0.10383 0.13607
df_targetU %>% filter(sp == "Ptr") %>% pull(prop) %>% sd
## [1] 0.02675534
df_targetU %>% group_by(sp) %>% summarise(s = sd(prop), m = mean(prop)) %>%arrange(m)
## # A tibble: 13 × 3
##    sp          s      m
##    <chr>   <dbl>  <dbl>
##  1 Gro   0.00462 0.0241
##  2 Spr   0.00849 0.0267
##  3 Lxy   0.00417 0.0550
##  4 Aal   0.0301  0.0579
##  5 Bme   0.00964 0.0608
##  6 Fex   0.00629 0.0626
##  7 Lco   0.00770 0.0796
##  8 Cbe   0.0122  0.0809
##  9 Aca   0.0207  0.0827
## 10 Ptr   0.0268  0.0862
## 11 Rfe   0.0154  0.101 
## 12 Cbp   0.0129  0.106 
## 13 Rca   0.00872 0.171
df_targetG %>% group_by(sp) %>% summarise(s = sd(prop), m = mean(prop)) %>%arrange(m)
## # A tibble: 13 × 3
##    sp            s         m
##    <chr>     <dbl>     <dbl>
##  1 Aal   0.0000299 0.0000509
##  2 Rfe   0.000123  0.000177 
##  3 Gro   0.000141  0.000228 
##  4 Cbe   0.000131  0.000312 
##  5 Aca   0.000473  0.00166  
##  6 Cbp   0.000485  0.00189  
##  7 Spr   0.00206   0.00625  
##  8 Fex   0.00150   0.00676  
##  9 Lxy   0.00181   0.00962  
## 10 Ptr   0.0228    0.0551   
## 11 Lco   0.00849   0.148    
## 12 Bme   0.0386    0.366    
## 13 Rca   0.0136    0.400

Ratio correction from a reference mock community (M_U)

This approach considers that the factor bias is independent to the whole community composition.

Correction factor computed from community M_U is:

a_s = prop_obs(s)/prop_init(s) (* constant value)

then prop_infer_mock(s) = prop_obs(s)/a_s(M_U)

where prop_infer_mock are the inferred proportions established by this ratio correction.

order_sp <- summary_experiment %>% dplyr::select(species_name, RankG) %>%
  arrange(-RankG) %>% pull(species_name)

correcU <- df_targetU %>%
  group_by(species_name) %>%
  summarise(correc = median(prop)/prop_expected[1], sp = sp[1])

#M_U
df_targetU <- df_targetU %>% left_join(correcU)
## Joining with `by = join_by(species_name, sp)`
df_targetU$prop_infer_mock <- df_targetU$prop/df_targetU$correc
df_targetU <- df_targetU %>%
  group_by(Sample) %>%
  mutate(prop_infer_mock = prop_infer_mock/sum(prop_infer_mock,
                                               na.rm = T))

df_targetU$species_name <- factor(df_targetU$species_name,
                                  levels = order_sp,
                                  ordered = T)

#M_T
df_targetT <- df_targetT %>% left_join(correcU)
## Joining with `by = join_by(species_name, sp)`
df_targetT$prop_infer_mock <- df_targetT$prop/df_targetT$correc
df_targetT <- df_targetT %>%
  group_by(Sample) %>%
  mutate(prop_infer_mock = prop_infer_mock/sum(prop_infer_mock,
                                               na.rm = T))
df_targetT$species_name <- factor(df_targetT$species_name,
                                  levels = order_sp,
                                  ordered = T)

#M_G
df_targetG <- df_targetG %>% left_join(correcU)
## Joining with `by = join_by(species_name, sp)`
df_targetG$prop_infer_mock <- df_targetG$prop/df_targetG$correc
df_targetG <- df_targetG %>%
  group_by(Sample) %>%
  mutate(prop_infer_mock = prop_infer_mock/sum(prop_infer_mock,
                                               na.rm = T))

df_targetG$species_name <- factor(df_targetG$species_name,
                                  levels = order_sp,
                                  ordered = T)
# ginferT1 <- ggplot(df_targetT)+
#   geom_boxplot(aes(factor(sp, levels = order_short, ordered = T),
#                    prop_infer_mock))+
#   geom_point(data = df_targetT %>% group_by(sp) %>%
#                summarise(prop = median(prop)),
#     aes(sp, prop, fill = "Observed\n(Median)"),
#              shape = 21, size = 3)+
#   theme(axis.text.x=element_blank())+
#   labs(y = "", x = "",
#        fill = "", col = "")+
#   geom_point(data = summary_experiment,
#              aes(sp, propT, fill = "Expected"), shape = 23,
#              size = 3)+
#   scale_fill_manual(values = colp)+
#   ggtitle("Uniform in Total DNA community (T)")+
#   theme(plot.title = element_text(hjust = 0.5))
# 
# ginferG1 <- ggplot(df_targetG)+
#   geom_boxplot(aes(factor(sp, levels = order_short, ordered = T),
#                    prop_infer_mock))+
#   geom_point(data = df_targetG %>% group_by(sp) %>%
#                summarise(prop = median(prop)),
#              aes(sp, prop, fill = "Observed\n(Median)"),
#              shape = 21, size = 3)+
#   labs(y = "", x = "",
#        fill = "", col = "")+
#   geom_point(data = summary_experiment,
#              aes(sp, propG, fill = "Expected"), shape = 23,
#              size = 3)+
#   scale_fill_manual(values = colp)+
#   ggtitle("Geometric community (G)")+
#   theme(plot.title = element_text(hjust = 0.5),
#         legend.position = "none")+
#   scale_y_log10()+
#   scale_x_discrete(labels= function(x) highlight(x, "Cb|Fe"))+
#   theme(axis.text.x=element_markdown())
# 
# fig <- egg::ggarrange(ginferT1, ginferG1,
#                       ncol = 1)
# annotate_figure(fig,
#                 top = text_grob("Corrected abundances in two mock communities with Sper01"),
#                 bottom = text_grob("Species"),
#                 left = text_grob("Inferred reads proportions", rot = 90))

# ggsave("Figures/metabar_results_corrected.pdf", units = "mm",
#        width = 180, height = 150, dpi = 600, device = "pdf")

Merge the results

resU1 <- df_targetU %>% group_by(species_name) %>%
  summarise(prop = median(prop), prop_expected = prop_expected[1],
            prop_infer_mock = median(prop_infer_mock))

resT1 <- df_targetT %>% group_by(species_name) %>%
  summarise(prop = median(prop), prop_expected = prop_expected[1],
            prop_infer_mock = median(prop_infer_mock))

resG1 <- df_targetG %>% group_by(species_name) %>%
  summarise(sp = sp[1], prop = median(prop), prop_expected = prop_expected[1],
            prop_infer_mock = median(prop_infer_mock))

Efficiencies inference

Export data to Julia

Export data to Julia to infer efficiencies with flimo

readsU <- df_targetU %>%
  dplyr::select(Sample, obiclean_weight, species_name) %>%
  pivot_wider(names_from = species_name,
              values_from = obiclean_weight) %>% ungroup %>% dplyr::select(-Sample)

qty_initU <- left_join(data.frame(species_name = colnames(readsU)),
                       summary_experiment %>%
                         dplyr::select(species_name, molecules_U)) %>%
              pull(molecules_U)
## Joining with `by = join_by(species_name)`
# write_csv(readsU, "data/reads_U.csv")
# write_csv(data.frame(qty_initU), "data/qty_initU.csv")

readsT <- df_targetT %>%
  dplyr::select(Sample, obiclean_weight, species_name) %>%
  pivot_wider(names_from = species_name,
              values_from = obiclean_weight) %>% ungroup %>% dplyr::select(-Sample)

qty_initT <- left_join(data.frame(species_name = colnames(readsU)),
                       summary_experiment %>%
                         dplyr::select(species_name, molecules_T)) %>%
              pull(molecules_T)
## Joining with `by = join_by(species_name)`
# write_csv(readsT, "data/reads_T.csv")
# write_csv(data.frame(qty_initT), "data/qty_initT.csv")

readsG <- df_targetG %>%
  dplyr::select(Sample, obiclean_weight, species_name) %>%
  pivot_wider(names_from = species_name,
              values_from = obiclean_weight,
              values_fill = 0) %>% ungroup %>% dplyr::select(-Sample)

qty_initG <- left_join(data.frame(species_name = colnames(readsU)),
                       summary_experiment %>%
                         dplyr::select(species_name, molecules_G)) %>%
              pull(molecules_G)
## Joining with `by = join_by(species_name)`
# write_csv(readsG, "data/reads_G.csv")
# write_csv(data.frame(qty_initG), "data/qty_initG.csv")

Proportions corrected with PCR efficiencies

Proportions inferred in Julia using PCR efficiencies

Run metabar_bias_functions.jl (functions), inference_pcr_efficiencies_ps.jl (inference of efficiencies from M_U and then proportions in M_T and M_G) and inference_taqman.jl (processing the taqman analysis)

Inference with the flimo method - reads in M_U used to infer efficiencies Lambda_s and K - then efficiencies used to infer proportions in M_T and M_G

Efficiencies

#Inferred efficiencies
efficiencies_infer <- t(read_csv("data/efficiencies_U.csv"))
## Rows: 13 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (1): Column1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
colnames(efficiencies_infer) <- colnames(readsU)

summary_experiment <- summary_experiment %>%
  left_join(efficiencies_infer %>%
              as_tibble() %>%
              pivot_longer(1:13, names_to = "species_name",
                           values_to = "efficiencies"))
## Joining with `by = join_by(species_name)`

Inferred proportions in M_T and M_G

Loading inferred proportions computed in Julia

inferT <- read.csv("data/prop_inferT.csv")
colnames(inferT) <- colnames(readsT)
resT1 <- resT1 %>%
  left_join(inferT %>% pivot_longer(everything(),
                                    names_to = "species_name",
                                    values_to = "prop_infer_Lambda")) %>%
  arrange(-dplyr::row_number())
## Joining with `by = join_by(species_name)`
inferG <- read.csv("data/prop_inferG.csv")
colnames(inferG) <- colnames(readsG)
resG1 <- resG1 %>%
  left_join(inferG %>% pivot_longer(everything(),
                                    names_to = "species_name",
                                    values_to = "prop_infer_Lambda")) %>%
  arrange(-dplyr::row_number())
## Joining with `by = join_by(species_name)`

Taqman assay

In this section, efficiencies of 3 species are estimated from a Taqman qPCR assay.

Open data (prepared in data_processing.Rmd)

res_taqman <- read.csv("data/res_taqman.csv")
kinetics <- read.csv("data/taqman_kinetics.csv")

Plot amplification curves and Cq = f(concentration)

ggplot(kinetics)+
  geom_line(aes(Cycle, RFU, group = Well, col = probe))

ggplot(res_taqman)+
  geom_point(aes(copies_ul, Cq, group = Well, col = probe))+
  scale_x_log10()+
  labs(y="Ct")+
  ggtitle("Ct = f(log(Concentration))")

First step: linear regression

Vmix_taq <- 25 #ul

res_taqman$slope <- NA
res_taqman$intercept <- NA

for (s in unique(res_taqman$probe)){
  reg <- lm(Cq ~ log10(copies),
            res_taqman %>%
              filter(probe == s) %>%
              group_by(Dilu) %>%
              summarise(Cq = mean(Cq),
                        copies = Vmix_taq*copies_ul[1]))
  res_taqman[res_taqman$probe == s, "slope"] <- reg$coefficients[2]
  res_taqman[
    res_taqman$probe == s, "intercept"] <- reg$coefficients[1]
}

res_taqman$rate <- 10^(-1/res_taqman$slope)-1

res_taqman$Mct <- 10^(-res_taqman$intercept/res_taqman$slope)

res_taqman %>% group_by(probe) %>%
  summarise(rate = rate[1],
            Mct = Mct[1]/1e11)
## # A tibble: 4 × 3
##   probe  rate   Mct
##   <chr> <dbl> <dbl>
## 1 CbeA  0.985  1.76
## 2 CbeB  0.979  2.31
## 3 Cbp   0.968  1.08
## 4 Fex   0.930  1.26

Try to infer with only 2 out of the 3 replicates : too important variability

flam <- function(p){
  10^(-1/p)-1
}

res_lm_test <- NULL

for (s in unique(res_taqman$probe)){
  for (try in 1:100){
    reg <- lm(Cq ~ log10(copies),
              res_taqman %>%
                filter(probe == s & !Neg) %>%
                group_by(Dilu) %>%
                summarise(Cq = mean(Cq[sample(1:n(), 2)]),
                          copies = Vmix_taq*copies_ul[1]))
    res_lm_test <- rbind(res_lm_test,
                         data.frame(probe = s,
                                    slope = reg$coefficients[2]))
  }
}

res_lm_test %>% group_by(probe) %>% summarise(min = min(slope),
                                              max = max(slope)) %>%
  mutate(lmin = flam(min), lmax = flam(max), gap = lmax - lmin)
## # A tibble: 4 × 6
##   probe   min   max  lmin  lmax    gap
##   <chr> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 CbeA  -3.39 -3.31 0.970 1.01  0.0367
## 2 CbeB  -3.40 -3.34 0.970 0.993 0.0222
## 3 Cbp   -3.45 -3.36 0.950 0.986 0.0356
<<<<<<< HEAD
## 4 Fex   -3.57 -3.45 0.905 0.949 0.0444
======= ## 4 Fex -3.57 -3.45 0.905 0.950 0.0447 >>>>>>> eb7f3054bbf0321141b9b1a66ddeff779bd19d4a

Infer constants of the model

RFU Threshold for each well

res_taqman$RFU_Cq <- NA

for (i in 1:nrow(res_taqman)){
  aux <- kinetics %>% filter(Dilu == res_taqman[i,]$Dilu &
                      Repli == res_taqman[i,]$Repli &
                      probe == res_taqman[i,]$probe &
                      abs(Cycle - res_taqman[i,]$Cq)<1)

res_taqman[i,"RFU_Cq"] <- (aux[2,]$RFU - aux[1,]$RFU)*
  (res_taqman[i,]$Cq-aux[1,]$Cycle)+aux[1,]$RFU
}

res_taqman <- res_taqman %>%
  mutate(K = Mct/(RFU_Cq/End.RFU))

Mct_glob <- res_taqman %>%
  group_by(sp) %>%
  summarise(Mct = mean(Mct)) %>% pull(Mct) %>% mean

K <- res_taqman %>%
  group_by(sp) %>%
  summarise(K = mean(K)) %>% pull(K) %>% sum

Computing PCR efficiencies

efficiencies <- res_taqman %>%
  mutate(Lambda = (Mct_glob/(Vmix_taq*copies_ul))^(1/Cq)-1)

ggplot(efficiencies)+
  geom_point(aes(probe, Lambda, col = factor(Dilu)))

efficiencies %>% group_by(probe) %>% summarise(Lambda = mean(Lambda))
## # A tibble: 4 × 2
##   probe Lambda
##   <chr>  <dbl>
## 1 CbeA   0.972
## 2 CbeB   0.947
## 3 Cbp    0.989
## 4 Fex    0.940
efficiencies %>% group_by(sp) %>% summarise(Lambda = mean(Lambda))
## # A tibble: 3 × 2
##   sp    Lambda
##   <chr>  <dbl>
## 1 Cbe    0.960
## 2 Cbp    0.989
## 3 Fex    0.940
#lam = Lambdas = efficiencies:
lam <- efficiencies %>% group_by(sp) %>%
  summarise(efficiency = mean(Lambda),
            sdeff = sd(Lambda))

res_taqman <- res_taqman %>% left_join(lam)
## Joining with `by = join_by(sp)`

Normalise End.RFU for Cbe

To compare the 2 probes for Carpinus betulus

x <- res_taqman %>% filter(sp == "Cbe" & Dilu < 6) %>%
  group_by(probe, Dilu) %>%
  summarise(end.rfu = mean(End.RFU), Cq = mean(Cq))
## `summarise()` has grouped output by 'probe'. You can override using the
## `.groups` argument.
correcCbe <- x %>% group_by(Dilu, probe) %>%
  summarise(end.rfu = mean(end.rfu)) %>%
  pivot_wider(names_from = probe, values_from = end.rfu) %>%
  mutate(correc = CbeA/CbeB)
## `summarise()` has grouped output by 'Dilu'. You can override using the
## `.groups` argument.
y <- kinetics %>% filter(sp == "Cbe" & !Neg) %>% left_join(correcCbe) %>%
  mutate(RFUbis = ifelse(probe == "CbeB", RFU*correc, RFU))
## Joining with `by = join_by(Dilu)`
ggplot(y)+
  geom_line(aes(Cycle, RFU, col = probe, group = Well,
                linetype = factor(Dilu)))

ggplot(y)+
  geom_line(aes(Cycle, RFUbis, col = probe, group = Well,
                linetype = factor(Dilu)))

z <- res_taqman %>% filter(sp == "Cbe" & !Neg)

z$RFU_Cq <- NA
for (i in 1:nrow(z)){
  a <- kinetics %>% filter(Dilu == z[i,]$Dilu &
                      Repli == z[i,]$Repli &
                      probe == z[i,]$probe &
                      abs(Cycle - z[i,]$Cq)<1)

z[i,"RFU_Cq"] <- (a[2,]$RFU - a[1,]$RFU)*
  (z[i,]$Cq-a[1,]$Cycle)+a[1,]$RFU
}

seuil_Cq <- mean(z$RFU_Cq)

fCq <- function(ampli){
  c2 <- which(ampli >= seuil_Cq)[1]
  c1 <- c2-1
  Cq <- (seuil_Cq - ampli[c1]+(ampli[c2] - ampli[c1])*c1)/
    (ampli[c2] - ampli[c1])
  Cq
}

res <- NULL
for (p in unique(y$Well)){
  res <- rbind(res,
               data.frame(Well = p,
                          probe = y[y$Well == p,]$probe[1],
                          Cq = fCq(y[y$Well == p,]$RFU),
                          Cqbis = fCq(y[y$Well == p,]$RFUbis)))
}

ggplot(res_taqman %>% filter(!Neg & sp == "Cbe"))+
  geom_point(aes(Dilu, Cq, col = probe))

Amplification bias

Comparing the 3 species

qmet <- df_targetU %>%
  filter(species_name %in% c("Capsella_bursa-pastoris",
                             "Carpinus_betulus",
                             "Fraxinus_excelsior"))

ggplot(qmet)+
  geom_boxplot(aes(species_name,
                   prop))+
  theme(axis.text.x = element_text(angle = 90))

qmet %>% group_by(species_name) %>%
  summarise(prop = mean(prop)) %>% mutate(prop = prop/sum(prop))
## # A tibble: 3 × 2
##   species_name             prop
##   <ord>                   <dbl>
## 1 Carpinus_betulus        0.325
## 2 Capsella_bursa-pastoris 0.424
## 3 Fraxinus_excelsior      0.251

Table 2: Amplification efficiencies and proportions in M_U

Summarising previous information

resU1 %>% dplyr::select(species_name, prop_expected, prop)
## # A tibble: 13 × 3
##    species_name             prop_expected   prop
##    <ord>                            <dbl>  <dbl>
##  1 Rhododendron_ferrugineum        0.0769 0.105 
##  2 Abies_alba                      0.0769 0.0531
##  3 Carpinus_betulus                0.0769 0.0850
##  4 Geranium_robertianum            0.0769 0.0256
##  5 Capsella_bursa-pastoris         0.0769 0.108 
##  6 Acer_campestre                  0.0769 0.0803
##  7 Fraxinus_excelsior              0.0769 0.0613
##  8 Lonicera_xylosteum              0.0769 0.0550
##  9 Salvia_pratensis                0.0769 0.0261
## 10 Populus_tremula                 0.0769 0.0863
## 11 Lotus_corniculatus              0.0769 0.0785
## 12 Rosa_canina                     0.0769 0.172 
## 13 Briza_media                     0.0769 0.0578
#Efficiencies by Taqman
lam
## # A tibble: 3 × 3
##   sp    efficiency   sdeff
##   <chr>      <dbl>   <dbl>
## 1 Cbe        0.960 0.0141 
## 2 Cbp        0.989 0.00501
## 3 Fex        0.940 0.0108
#Normalized efficiencies
efficiencies %>% group_by(probe) %>%
    summarise(efficiency = mean(Lambda),
              sdeff = sd(Lambda)) %>%
  mutate(efficiency = efficiency/lam$efficiency[3]*0.905,
         sdeff = sdeff/lam$efficiency[3]*0.905)
## # A tibble: 4 × 3
##   probe efficiency   sdeff
##   <chr>      <dbl>   <dbl>
## 1 CbeA       0.936 0.00540
## 2 CbeB       0.912 0.00588
## 3 Cbp        0.953 0.00482
## 4 Fex        0.905 0.0104
summary_experiment %>% arrange(RankG) %>%
  dplyr::select(species_name, efficiencies)
## # A tibble: 13 × 2
##    species_name             efficiencies
##    <chr>                           <dbl>
##  1 Briza_media                     0.902
##  2 Rosa_canina                     1    
##  3 Lotus_corniculatus              0.927
##  4 Populus_tremula                 0.935
##  5 Salvia_pratensis                0.827
##  6 Lonicera_xylosteum              0.892
##  7 Fraxinus_excelsior              0.905
##  8 Acer_campestre                  0.931
##  9 Capsella_bursa-pastoris         0.954
## 10 Geranium_robertianum            0.818
## 11 Carpinus_betulus                0.929
## 12 Abies_alba                      0.897
## 13 Rhododendron_ferrugineum        0.950

Table 3: proportions in M_T and M_G

Inferred proportions in M_T and M_G

Proportions

Inferred with the ratio method or the new approach in this paper

resT1
## # A tibble: 13 × 5
##    species_name             prop prop_expected prop_infer_mock prop_infer_Lambda
##    <chr>                   <dbl>         <dbl>           <dbl>             <dbl>
##  1 Briza_media            0.0620        0.0851          0.0842            0.0829
##  2 Rosa_canina            0.262         0.144           0.123             0.125 
##  3 Lotus_corniculatus     0.114         0.0953          0.115             0.116 
##  4 Populus_tremula        0.210         0.168           0.198             0.203 
##  5 Salvia_pratensis       0.0385        0.104           0.118             0.114 
##  6 Lonicera_xylosteum     0.0269        0.0500          0.0385            0.0387
##  7 Fraxinus_excelsior     0.0429        0.0590          0.0568            0.0563
##  8 Acer_campestre         0.0793        0.0913          0.0781            0.0818
##  9 Capsella_bursa-pastor… 0.0322        0.0261          0.0245            0.0248
## 10 Geranium_robertianum   0.0209        0.0558          0.0653            0.0704
## 11 Carpinus_betulus       0.0276        0.0357          0.0258            0.0269
## 12 Abies_alba             0.0145        0.0608          0.0227            0.0212
## 13 Rhododendron_ferrugin… 0.0506        0.0254          0.0371            0.0389
resG1
## # A tibble: 13 × 6
##    species_name    sp       prop prop_expected prop_infer_mock prop_infer_Lambda
##    <chr>           <chr>   <dbl>         <dbl>           <dbl>             <dbl>
##  1 Briza_media     Bme   3.64e-1      0.500          0.536             0.530    
##  2 Rosa_canina     Rca   4.01e-1      0.250          0.200             0.202    
##  3 Lotus_cornicul… Lco   1.48e-1      0.125          0.160             0.162    
##  4 Populus_tremula Ptr   5.20e-2      0.0625         0.0516            0.0557   
##  5 Salvia_pratens… Spr   6.03e-3      0.0313         0.0199            0.0210   
##  6 Lonicera_xylos… Lxy   9.38e-3      0.0156         0.0147            0.0155   
##  7 Fraxinus_excel… Fex   6.84e-3      0.00781        0.00955           0.00951  
##  8 Acer_campestre  Aca   1.60e-3      0.00391        0.00170           0.00176  
##  9 Capsella_bursa… Cbp   1.92e-3      0.00195        0.00151           0.00154  
## 10 Geranium_rober… Gro   1.95e-4      0.000977       0.000643          0.000849 
## 11 Carpinus_betul… Cbe   3.03e-4      0.000488       0.000306          0.000337 
## 12 Abies_alba      Aal   4.50e-5      0.000244       0.0000718         0.0000470
## 13 Rhododendron_f… Rfe   1.45e-4      0.000122       0.000119          0.000141

Proportions with exponential model

Computation of neff for M_U (number of effective exponential cycles) with the truncated exponential model

The K value used is the one selected during the inference in flimo.

summary_experiment <- summary_experiment %>%
  left_join(resU1 %>% dplyr::select(species_name, prop) %>%
                  rename(prop_obsU = prop)) %>%
  left_join(resT1 %>% dplyr::select(species_name, prop) %>%
              rename(prop_obsT = prop)) %>%
  left_join(resG1 %>% dplyr::select(species_name, prop) %>%
              rename(prop_obsG = prop))
## Joining with `by = join_by(species_name)`
## Joining with `by = join_by(species_name)`
## Joining with `by = join_by(species_name)`
K <- 1.32e11

#mg stands for Geometric Mean

mg_lam_s_p1 <- exp(mean(log(summary_experiment$efficiencies+1))) #Mean of Lambda_s + 1

#M_U

mg_psU <- exp(mean(log(summary_experiment$prop_obsU))) #Mean of relative abundances ps

mg_m0sU <- exp(mean(log(VDNA_metab*summary_experiment$molecules_U))) #molecules/ug ; Mean of initial numbers of molecules m0s

neffU <- (log(mg_psU)+log(K)-log(mg_m0sU))/log(mg_lam_s_p1)

#M_T

mg_psT <- exp(mean(log(summary_experiment$prop_obsT)))
mg_m0sT <- exp(mean(log(VDNA_metab*summary_experiment$molecules_T))) #molecules/ug
neffT <- (log(mg_psT)+log(K)-log(mg_m0sT))/log(mg_lam_s_p1)

#M_G

mg_psG <- exp(mean(log(summary_experiment$prop_obsG)))
mg_m0sG <- exp(mean(log(VDNA_metab*summary_experiment$molecules_G))) #molecules/ug
neffG <- (log(mg_psG)+log(K)-log(mg_m0sG))/log(mg_lam_s_p1)

Cycles <- 1:40
res_exp <- NULL
for (cyc in Cycles){
    m0s <- summary_experiment$prop_obsU/
      (1+summary_experiment$efficiencies)^cyc
  psU <- m0s/sum(m0s)
  
      m0s <- summary_experiment$prop_obsT/
        (1+summary_experiment$efficiencies)^cyc
  psT <- m0s/sum(m0s)
  
      m0s <- summary_experiment$prop_obsG/
        (1+summary_experiment$efficiencies)^cyc
  psG <- m0s/sum(m0s)
  
  res_exp <- rbind(res_exp, data.frame(sp = summary_experiment$sp,
                                       cycle = cyc,
                                       psU = psU, psT = psT, psG = psG))

}

# res_exp %>% View

ggplot(res_exp)+
  geom_line(aes(cycle, psU, col = sp))+
  geom_hline(yintercept = 1/13)+
  geom_vline(xintercept = neffU)

ggplot(res_exp)+
  geom_line(aes(cycle, psT, col = sp))

ggplot(res_exp)+
  geom_line(aes(cycle, psG, col = sp))

Estimation error: RMSE values

RMSE (root mean squared error) to compare observed/corrected vs expected proportions Either absolute or relative error

AbsErr <- function(p_observed, p_expected){
  sqrt(sum((p_observed-p_expected)^2)/length(p_observed))
}

RelErr <- function(p_observed, p_expected){
  sqrt(sum(((p_observed-p_expected)/p_expected)^2)/length(p_observed))
}

#U
summary(resU1$prop)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.02565 0.05499 0.07852 0.07646 0.08630 0.17207
AbsErr(resU1$prop, resU1$prop_expected)
## [1] 0.03702942
RelErr(resU1$prop, resU1$prop_expected)
## [1] 0.4813825
#T
summary(resT1$prop)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01454 0.02758 0.04289 0.07551 0.07932 0.26184
AbsErr(resT1$prop, resT1$prop_expected)
## [1] 0.04465347
AbsErr(resT1$prop_infer_mock, resT1$prop_expected)
## [1] 0.01741129
AbsErr(resT1$prop_infer_Lambda, resT1$prop_expected)
## [1] 0.01837216
RelErr(resT1$prop, resT1$prop_expected)
## [1] 0.5269626
RelErr(resT1$prop_infer_mock, resT1$prop_expected)
## [1] 0.2631424
RelErr(resT1$prop_infer_Lambda, resT1$prop_expected)
## [1] 0.2798953
#RMSE of T if it is considered as a uniform community
AbsErr(resT1$prop, resU1$prop_expected)
## [1] 0.07382099
RelErr(resT1$prop, resU1$prop_expected)
## [1] 0.9596729
#G
summary(resG1$prop)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000450 0.0003027 0.0060311 0.0762360 0.0519728 0.4009659
AbsErr(resG1$prop, resG1$prop_expected)
## [1] 0.05724279
AbsErr(resG1$prop_infer_mock, resG1$prop_expected)
## [1] 0.02026261
AbsErr(resG1$prop_infer_Lambda, resG1$prop_expected)
## [1] 0.01914612
RelErr(resG1$prop, resG1$prop_expected)
## [1] 0.4930014
RelErr(resG1$prop_infer_mock, resG1$prop_expected)
## [1] 0.3358845
RelErr(resG1$prop_infer_Lambda, resG1$prop_expected)
## [1] 0.3322317

Inference at neffU : if the amplification is exponential and is stopped after neffU cycles, the estimated proportions are psU, psT and psG (below). Associated RMSE.

m0s <- summary_experiment$prop_obsU/
  (1+summary_experiment$efficiencies)^neffU
psU <- m0s/sum(m0s)
  
m0s <- summary_experiment$prop_obsT/
  (1+summary_experiment$efficiencies)^neffU
psT <- m0s/sum(m0s)
  
m0s <- summary_experiment$prop_obsG/
  (1+summary_experiment$efficiencies)^neffU
psG <- m0s/sum(m0s)

AbsErr(psU, summary_experiment$propU)
## [1] 0.003087341
RelErr(psU, summary_experiment$propU)
## [1] 0.04013544
AbsErr(psT, summary_experiment$propT)
## [1] 0.01808665
RelErr(psT, summary_experiment$propT)
## [1] 0.2917926
AbsErr(psG, summary_experiment$propG)
## [1] 0.01740845
RelErr(psG, summary_experiment$propG)
## [1] 0.3277107

###Associated biodiversity measures: Hill numbers

Computing the Hill numbers to evaluate the influence of correction on ecological conclusions.

#Hill
D <- function(ps, q){
  ps <- ps/sum(ps)
  ps <- ps[ps>0]
  ifelse(q == 1, exp(-sum(ps*log(ps))), sum(ps^q)^(1/(1-q)))
}

#Community T

D(resT1$prop_expected, 1)
## [1] 11.26119
D(resT1$prop, 1)
## [1] 8.868797
D(resT1$prop_infer_mock, 1)
## [1] 10.64034
D(resT1$prop_infer_Lambda, 1)
## [1] 10.63284
D(resT1$prop_expected, 2)
## [1] 10.02984
D(resT1$prop, 2)
## [1] 6.647851
D(resT1$prop_infer_mock, 2)
## [1] 9.125834
D(resT1$prop_infer_Lambda, 2)
## [1] 9.105885
#Community G

D(resG1$prop_expected, 1)
## [1] 3.995114
D(resG1$prop, 1)
## [1] 3.70669
D(resG1$prop_infer_mock, 1)
## [1] 3.733344
D(resG1$prop_infer_Lambda, 1)
## [1] 3.806799
D(resG1$prop_expected, 2)
## [1] 2.999268
D(resG1$prop, 2)
## [1] 3.089144
D(resG1$prop_infer_mock, 2)
## [1] 2.784234
D(resG1$prop_infer_Lambda, 2)
## [1] 2.845309

Plot proportions for G

ginferG1 <- ggplot(df_targetG)+
  geom_boxplot(aes(factor(sp, levels = order_short,
                          ordered = T),
                   prop))+
    geom_point(data = summary_experiment,
             aes(sp, propG, fill = "Expected"), shape = 23,
             size = 2)+
    geom_point(data = resG1, aes(sp, prop_infer_Lambda, fill = "Inferred\nwith model"),
             shape = 23, size = 2)+
  labs(y = "Proportions", x = "Species",
       fill = "", col = "")+
  scale_fill_manual(values = colp)+
  ggtitle("Abundances in the Geometric community (G)")+
  theme(plot.title = element_text(hjust = 0.5))+
  scale_y_log10()+
  scale_x_discrete(labels= function(x) highlight(x, "Cb|Fe"))+
  theme(axis.text.x=element_markdown())

ginferG1

# ggsave("Figures/metabar_results_corrected_G.png", units = "mm",
#        width = 150, height = 90, dpi = 600)

Figure 5: Simulations: impact of efficiency variations

Simulated amplification to quantify the effect of efficiency differences.

Preparing simulations

pcr_sim : Function to simulate PCR with multiple species

pcr_sim <- function(m0=1,
                    lambda=rep(1, length(m0)),
                    K=1e11,
                    cycles = 40,
                    satu = c("log", "meca"), #meca not introduced in paper
                    c = 0.9, #for meca
                    e = 1.1, #for meca
                    delta = 1) { #for initial overdispersion
  param <- data.frame(m0 = m0,
                      lambda = lambda)
  nspecies <- nrow(param)
  
  #Number of molecules at each cycle for each species
  kinetics <- matrix(0, nrow = cycles+1, ncol = nspecies)
  if (delta <= 1){
    for (m in 1:nspecies){
      kinetics[1,m] <- rpois(1, m0[m])
    }
  } else {
    for (m in 1:nspecies){
      kinetics[1,m] <- rnbinom(1, m0[m]/delta, 1/delta)
    }
  }
  for (cyc in 2:(1+cycles)) {
    charge <- sum(kinetics[cyc-1,]-kinetics[1,])/K
    for (m in 1:nspecies) {
      lam <- lambda[m]
      mc <- kinetics[cyc-1,m] #Molecules of s at previous cycle
      if(satu[1] == "meca"){ #This model is not introduced in the paper
        gamma <- function(x){
          2/(1+sqrt(1-4*c^2*x*(1-x)))
        }
        lam <- lam*c*gamma(charge)*(1-charge)*(e-charge)
      }
      else{ #logistic
        lam <- max(0, lam*(1-charge))
      }
      #Number of created molecules
      nb <- rbinom(1, mc, lam)
      kinetics[cyc,m] <- kinetics[cyc-1,m]+nb
    }
  }
  return(kinetics)
}

nspecies <- 2
ncycles <- 40
M0 = rep(2.5e5/nspecies, nspecies) #Total number of molecules: 2.5e5

#Tested values
lam1 <- 1 #Efficiency of species 1
Lam2 <- seq(0.8, 1, by = 0.001) #Different efficiencies of species 2

res <- NULL
for (lam2 in Lam2){
  p <- pcr_sim(m0=M0, lambda=c(lam1, lam2), K=K,
                cycles = ncycles)
  res <- rbind(res, p[ncycles+1,])
}

#Proportions of the 2 species
prop2 <- df_targetU %>% group_by(sp) %>%
  summarise(propU = median(prop)) %>% ungroup() %>%
  mutate(propU = propU/(propU+propU[11])) %>% #11 = Rca
  left_join(summary_experiment %>% dplyr::select(sp, efficiencies)) %>%
  mutate(deff = 1-efficiencies) %>% filter(sp != "Rc")
## Joining with `by = join_by(sp)`

Figure:

#Labels positions
prop2$labelx <- prop2$deff - 0.0021
prop2[prop2$sp == "Rfe","labelx"] <- prop2[prop2$sp == "Rfe","labelx"]+
  0.0042
prop2[prop2$sp == "Lco","labelx"] <- prop2[prop2$sp == "Lco","labelx"]+
  0.0042
prop2[prop2$sp == "Lxy","labelx"] <- prop2[prop2$sp == "Lxy","labelx"]+
  0.0042
prop2[prop2$sp == "Aal","labelx"] <- prop2[prop2$sp == "Aal","labelx"]+
  0.0038
prop2[prop2$sp == "Bme","labelx"] <- prop2[prop2$sp == "Bme","labelx"]+
  0.0037
prop2[prop2$sp == "Cbe","labelx"] <- prop2[prop2$sp == "Cbe","labelx"]+
  0.0021


ggplot()+
    geom_vline(data = prop2 %>% filter(sp != "Rca"), aes(xintercept = deff),
             col = "grey80")+
    geom_line(aes(1-Lam2/lam1, res[,1]/(rowSums(res)), col = "Rosa canina"))+
  geom_line(aes(1-Lam2/lam1, res[,2]/(rowSums(res)), col = "other"))+
  geom_hline(yintercept = 0.5, linetype = "dashed")+
  # ggtitle("Simulated mock communities of two species with equal starting quantities")+
  # theme(plot.title = element_text(hjust = 0.5))+
  geom_point(data = prop2 %>% filter(sp != "Rca"),
             aes(deff, propU, col = "other"))+
  geom_point(data = prop2 %>% filter(sp != "Rca"),
             aes(deff, 1-propU, col = "Rosa canina"))+
  geom_text(aes(0.135, 0.73, label = "Rosa canina", col = "Rosa canina"),
            size = 4)+
  geom_text(aes(0.135, 0.27, label = "Second species", col = "other"),
            size = 4)+
  geom_text(data = prop2 %>%
              filter(sp != "Rca") %>% arrange(efficiencies),
              aes(labelx, 1, label = sp),
            col = "grey50",
            size = 3, angle = 90)+
    geom_text(data = prop2 %>% filter(sp %in% c("Cbe", "Cbp", "Fex")) %>%
                arrange(efficiencies),
              aes(labelx, 1, label = sp),
            col = "grey25",
            size = 3, angle = 90)+
  labs(x = "Relative decrease of PCR efficiency compared to Rosa canina",
       y = "Final relative abundances", col = "Species")+
  coord_cartesian(xlim = c(0.006, 0.185), ylim = c(0,1))+
  theme(legend.position = "none")+
  scale_colour_brewer(palette = "Set1")
<<<<<<< HEAD

=======

>>>>>>> eb7f3054bbf0321141b9b1a66ddeff779bd19d4a
  #   geom_text_repel(
  #   data = prop2 %>% filter(sp != "Rca"),
  #   aes(deff, propU, label = sp),
  #   size = 4,
  #   min.segment.length = unit(0, 'lines'))

# rep(c(1,0.95), 6)

# ggsave(filename = "Figures/simu_efficiencies.pdf", units = "mm",
#        height = 100, width = 150, device = "pdf", dpi = 600)

Supplementary Table 1: C-values

Mass of one genome for the 13 species.

summary_experiment$Cvalue <- c(0.46, 0.45, 1.03, 0.84, 0.70, 17.29,
                               1.02, 6.35, 1.40, 0.40, 1.76, 0.74, 0.87)

summary_experiment <- summary_experiment %>%
  mutate(copies_genome = Molecules_ng*1e9*Cvalue*1e-12)

summary_experiment
## # A tibble: 13 × 21
##    Sample species_name         sp    Molecules_ng CDNA_sample Volume_equi cDNA_U
##     <dbl> <chr>                <chr>        <dbl>       <dbl>       <dbl>  <dbl>
##  1      4 Salvia_pratensis     Spr        152880        24.4         1.62 0.0624
##  2      6 Populus_tremula      Ptr        248096        31.4         1    0.0385
##  3     10 Carpinus_betulus     Cbe         52667.        9.14        4.71 0.181 
##  4     12 Fraxinus_excelsior   Fex         87040        22.4         2.85 0.110 
##  5     16 Lonicera_xylosteum   Lxy         73740        45.8         3.36 0.129 
##  6     18 Abies_alba           Aal         89653.        3.58        2.77 0.106 
##  7     20 Acer_campestre       Aca        134683.       12.2         1.84 0.0708
##  8     22 Briza_media          Bme        125573.      183           1.98 0.0760
##  9     24 Rosa_canina          Rca        211640        50.8         1.17 0.0451
## 10     26 Capsella_bursa-past… Cbp         38533.       38.8         6.44 0.248 
## 11     28 Geranium_robertianum Gro         82293.       15           3.01 0.116 
## 12     30 Rhododendron_ferrug… Rfe         37483.        3.9         6.62 0.255 
## 13     32 Lotus_corniculatus   Lco        140480        65.2         1.77 0.0679
## # ℹ 14 more variables: molecules_U <dbl>, propU <dbl>, RankG <dbl>,
## #   molecules_T <dbl>, propT <dbl>, cDNA_G <dbl>, molecules_G <dbl>,
## #   propG <dbl>, efficiencies <dbl>, prop_obsU <dbl>, prop_obsT <dbl>,
## #   prop_obsG <dbl>, Cvalue <dbl>, copies_genome <dbl>

Supplementary Table 2: initial communities composition

summary_experiment %>% arrange(RankG)
## # A tibble: 13 × 21
##    Sample species_name         sp    Molecules_ng CDNA_sample Volume_equi cDNA_U
##     <dbl> <chr>                <chr>        <dbl>       <dbl>       <dbl>  <dbl>
##  1     22 Briza_media          Bme        125573.      183           1.98 0.0760
##  2     24 Rosa_canina          Rca        211640        50.8         1.17 0.0451
##  3     32 Lotus_corniculatus   Lco        140480        65.2         1.77 0.0679
##  4      6 Populus_tremula      Ptr        248096        31.4         1    0.0385
##  5      4 Salvia_pratensis     Spr        152880        24.4         1.62 0.0624
##  6     16 Lonicera_xylosteum   Lxy         73740        45.8         3.36 0.129 
##  7     12 Fraxinus_excelsior   Fex         87040        22.4         2.85 0.110 
##  8     20 Acer_campestre       Aca        134683.       12.2         1.84 0.0708
##  9     26 Capsella_bursa-past… Cbp         38533.       38.8         6.44 0.248 
## 10     28 Geranium_robertianum Gro         82293.       15           3.01 0.116 
## 11     10 Carpinus_betulus     Cbe         52667.        9.14        4.71 0.181 
## 12     18 Abies_alba           Aal         89653.        3.58        2.77 0.106 
## 13     30 Rhododendron_ferrug… Rfe         37483.        3.9         6.62 0.255 
## # ℹ 14 more variables: molecules_U <dbl>, propU <dbl>, RankG <dbl>,
## #   molecules_T <dbl>, propT <dbl>, cDNA_G <dbl>, molecules_G <dbl>,
## #   propG <dbl>, efficiencies <dbl>, prop_obsU <dbl>, prop_obsT <dbl>,
## #   prop_obsG <dbl>, Cvalue <dbl>, copies_genome <dbl>

Simulations of mock communities

100 simulated communities for each of the 3 M0tot values with 13 species

###Varying initial quantity, final abundances

The initial number of molecules varies in the M_U community:

#Different values of total number of molecules at the beginning
M0tot <- 2.5e5*c(1e-1, 1, 1e1)

nsim <- 100

compo <- NULL

for (sim in 1:nsim){
  for (m0tot in M0tot){
    #Composition of this community
    MU_varM0 <- pcr_sim(m0=rep(m0tot/nrow(summary_experiment),
                           nrow(summary_experiment)),
                    lambda=summary_experiment$efficiencies,
                    K=K,
                cycles = ncycles)
    compo <- rbind(compo, data.frame(sim = sim,
                                   M0tot = m0tot,
                                   sp = summary_experiment$sp,
                                   ps = MU_varM0[nrow(MU_varM0),]/
                                     sum(MU_varM0[nrow(MU_varM0),])))
  }
}

ggplot(compo %>% filter(sp %in% c("Rca", "Gro")))+
  geom_boxplot(aes(factor(M0tot), ps, col = sp))+
  geom_point(data = data.frame(),
             aes(factor(M0tot[2]),
                 (summary_experiment %>%
                    filter(sp %in% c("Rca", "Gro")) %>% pull(prop_obsU))),
  shape = 23, fill = "gold")+
  scale_color_brewer(palette = "Set1")
<<<<<<< HEAD

=======

>>>>>>> eb7f3054bbf0321141b9b1a66ddeff779bd19d4a
#adjust positions for real proportions...

compo %>% filter(sp %in% c("Rca", "Gro")) %>% group_by(M0tot, sp) %>%
  summarise(mps = mean(ps))
## `summarise()` has grouped output by 'M0tot'. You can override using the
## `.groups` argument.
## # A tibble: 6 × 3
## # Groups:   M0tot [3]
##     M0tot sp       mps
##     <dbl> <chr>  <dbl>
## 1   25000 Gro   0.0195
<<<<<<< HEAD
## 2   25000 Rca   0.192 
=======
## 2   25000 Rca   0.193 
>>>>>>> eb7f3054bbf0321141b9b1a66ddeff779bd19d4a
## 3  250000 Gro   0.0243
## 4  250000 Rca   0.171 
## 5 2500000 Gro   0.0300
## 6 2500000 Rca   0.152

Varying initial quantities, inferred abundances

Data are generated with the logistic model. Corrections are brought with our approach and with the ratio method (assuming exponential model)

compo_ref <- compo %>% filter(M0tot == 2.5e5) %>% group_by(sp) %>%
  summarise(med_ps = median(ps)) %>%
  mutate(correc = med_ps/(1/nrow(summary_experiment)))
#observed ps/expected ps
#with expected ps = 1/13

compo <- compo %>% left_join(summary_experiment[,c("sp", "species_name")]) %>%
  left_join(compo_ref) %>%
  mutate(ps_ratio = ps/correc)
## Joining with `by = join_by(sp)`
## Joining with `by = join_by(sp)`
compo <- compo %>% left_join(
    compo %>% group_by(sim, M0tot) %>%
      summarise(sum_ps_ratio = sum(ps_ratio)) ) %>%
  mutate(ps_ratio = ps_ratio/sum_ps_ratio) %>%
  dplyr::select(-sum_ps_ratio)
## `summarise()` has grouped output by 'sim'. You can override using the `.groups`
## argument.
## Joining with `by = join_by(sim, M0tot)`

Export to Julia (for inference)

for(i in 1:length(M0tot)){
  m0tot <- M0tot[i]
  mat <- compo %>% filter(M0tot == m0tot) %>%
    dplyr::select(sim, species_name, ps) %>%
    pivot_wider(names_from = species_name, values_from = ps) %>%
    dplyr::select(colnames(readsU)) %>% as.matrix()
  # write.csv(mat, paste0("data/sim100_m0tot_", i, ".csv"),row.names = F)
}
# df_targetU2 <- df_targetU %>% left_join(correcU)
# 
# ggplot(compo)+
#     geom_boxplot(data = df_targetU2, aes(sp, prop, col = "MU"),
#                  alpha = 0.2)+
#     geom_boxplot(data = df_targetU2, aes(sp, prop/correc, col = "MU_correc"),
#                  alpha = 0.2)+
#   geom_boxplot(aes(sp, ps, col = "sim"), alpha = 0.2)+
#   geom_boxplot(aes(sp, ps/correc, col = "sim_correc"), alpha = 0.2)+
#   facet_wrap(.~M0tot, ncol = 1)+
#   geom_hline(yintercept = 1/13)

Inference results computed in Julia

res_commu1 <- read.csv("data/res_commu1.csv")
res_commu2 <- read.csv("data/res_commu2.csv")
res_commu3 <- read.csv("data/res_commu3.csv")

colnames(res_commu1) <- colnames(readsU)
colnames(res_commu2) <- colnames(readsU)
colnames(res_commu3) <- colnames(readsU)

res_commu1 <- res_commu1 %>% as_tibble() %>%
  mutate(sim = 1:100, M0tot = 2.5e4) %>%
  pivot_longer(1:13, names_to = "species_name", values_to = "ps_log") %>%
  left_join(summary_experiment %>% dplyr::select(sp, species_name))
## Joining with `by = join_by(species_name)`
res_commu2 <- res_commu2 %>% as_tibble() %>%
  mutate(sim = 1:100, M0tot = 2.5e5) %>%
  pivot_longer(1:13, names_to = "species_name", values_to = "ps_log") %>%
  left_join(summary_experiment %>% dplyr::select(sp, species_name))
## Joining with `by = join_by(species_name)`
res_commu3 <- res_commu3 %>% as_tibble() %>%
  mutate(sim = 1:100, M0tot = 2.5e6) %>%
  pivot_longer(1:13, names_to = "species_name", values_to = "ps_log") %>%
  left_join(summary_experiment %>% dplyr::select(sp, species_name))
## Joining with `by = join_by(species_name)`
res_infer_log <- rbind(res_commu1, res_commu2, res_commu3)

Supplementary Figure 3

to compare the two approaches

ggplot(compo)+
  geom_boxplot(aes(factor(sp, levels = order_short, ordered = T),
                   ps_ratio,
                   group = sp, col = "Ratio"), alpha = 0.1)+
  geom_boxplot(data = res_infer_log, aes(sp, ps_log,
                   group = sp, col = "Logistic model"), alpha = 0.1)+
  geom_point(data = summary_experiment,
             aes(sp, 1/nrow(summary_experiment),
                 fill = "Expected\nwithout\nPCR bias"),
             shape = 23,
             size = 3)+
  facet_wrap(M0tot~., ncol = 1)+
  # scale_color_manual(values = colp)+
  scale_fill_manual(values = colp)+
  labs(x = "Species", y = "Corrected reads proportions",
       fill = "", col = "Correction")+
  scale_color_brewer(palette = "Set1")+
  ggtitle(TeX("$M_0^{{total}}$")) +
  theme(legend.title = element_text(hjust = 0.5),
    plot.title = element_text(hjust = 0.5),
    axis.text.x=element_markdown())+
  scale_x_discrete(labels = function(x) highlight(x, "Cb|Fe"))
<<<<<<< HEAD

=======

>>>>>>> eb7f3054bbf0321141b9b1a66ddeff779bd19d4a
# ggsave(filename = "Figures/simu_exp_log.pdf",
#        dpi = 600, units = "mm",
#        width = 150, height = 180, device = "pdf")

Supplementary Figures 1 and 2

###Data

param <- read.csv("data/res_infer_Lambda_K.csv")

colnames(param) <- c(colnames(readsU)[-1], "lK")

param$K <- 10^param$lK*1e13

median(param$K)/1e11
## [1] 1.325024
param <- param %>% pivot_longer(1:(nrow(summary_experiment)-1),
                                names_to = "species_name",
                                values_to = "efficiencies")

param <- param %>%
  left_join(summary_experiment %>% dplyr::select(species_name, sp))
## Joining with `by = join_by(species_name)`

###Supplementary Fig 1: K = f(Lambda_s)

ggplot(param)+
  geom_point(aes(K, efficiencies, col = sp))+
  # geom_density(aes(K, y = ..scaled..))+
  # scale_y_continuous(sec.axis = sec_axis(~./5-1, name="Density")) +
  # geom_hline(aes(yintercept = 1, col = "Rca"))+
  geom_segment(aes(x = min(param$K), xend = max(param$K), y = 1, yend = 1,
                   col = "Rca"))+
  geom_vline(xintercept = median(param$K), linetype = "dashed")+
  scale_x_log10()+
  labs(y = TeX("Efficiencies $\\Lambda_s$"), col = "Species")+
    theme(
        legend.title.align=0.5,
        legend.text.align = 0.5)

<<<<<<< HEAD
# ggsave(filename = "Figures/Lambda_fctK.pdf",
#        dpi = 600, units = "mm",
#        width = 150, height = 100, device = "pdf")
=======
ggsave(filename = "Figures/Lambda_fctK.pdf",
       dpi = 600, units = "mm",
       width = 150, height = 100, device = "pdf")
>>>>>>> eb7f3054bbf0321141b9b1a66ddeff779bd19d4a

###Supplementary Fig 2: hist(K)

ggplot(param)+
  geom_histogram(aes(K, y = ..count../sum(..count..)), bins = 15)+
  scale_x_log10()+
  labs(y = "Frequency")

# ggsave(filename = "Figures/histK.pdf",
#        dpi = 600, units = "mm",
#        width = 150, height = 100, device = "pdf")

Estimation of K from PCR composition

Molar mass of 1 dNTP: \(M=327g/mol\) in average

https://www.thermofisher.com/fr/fr/home/references/ambion-tech-support/rna-tools-and-calculators/dna-and-rna-molecular-weights-and-conversions.html

Molar concentration of the 4 dNTP in well: \(c_n=1 mM = 1.10^{-3}mol/l\)

https://www.thermofisher.com/fr/fr/home/life-science/cloning/cloning-learning-center/invitrogen-school-of-molecular-biology/pcr-education/pcr-reagents-enzymes/pcr-component-considerations.html

Quantity of primers in well: \(n_p= 20pmol\)/PCR ?

Volume of well: \(V = 20 \mu l\)

Constant of Avogadro \(N_A = 6.02214076\times 10^{23} \: mol^{-1}\)

Average length of a sequence: \(L=50\) dNTP

Thus, \(K=\frac{min(c_n\times V, \: n_p)}{L}\times N_A\)

This is the maximum number of molecules that can be created in a well. Around 1e14.

c_n = 1e-3 #concentration of nucleotides, mol/l
Vwell = 20e-6 #volume of a well, l
N_A = 6.02214076e23 #Avogadro
L = barcodes$sequence %>% nchar %>% mean
#mean length of a sequence, number of nucleotides

n_p = 1e-8 #quantity of primers, mol

Ksup = min(c_n*Vwell, n_p)/L*N_A
Ksup
## [1] 1.182357e+14